/*********************************************************************
Replication code for Systemic Discrimination Among Large U.S. Employers
Patrick M. Kline, Evan K. Rose, Christopher R. Walters
April, 2022

This code produces Table A3, which reports tests of the relationship 
between standardized contact gaps and standard errors.

It requires top-level directory be set to the replication folder using
the global below.
*********************************************************************/

*Load data
global dir "/accounts/projects/pkline/randres/randres/replication"
capture restore
clear all
set seed 101485

use ${dir}/data/data.dta, clear
    
*Constructed variables
bys firm_id: egen mean_cb = mean(cb)
gen w = 1
egen jid = group(firm_id job_id)

*Create variables
gen Y=cb
gen W=1-black
egen f=group(firm_id)
sum f, detail
local F=r(max)	
qui tab f, ge(f_)

*Collapse down to job-level mean diffs
gen Y_1=Y*W
gen Y_0=Y*(1-W)
bys job_id: egen C_1=sum(Y_1)
bys job_id: egen C_0=sum(Y_0)
bys job_id: egen N_1=sum(W)
bys job_id: gen N_0 = _N - N_1
gen delta_fj=(C_1/N_1) - (C_0/N_0)
keep if N_1>0 & N_0>0
bys job_id: keep if _n==1

*Save data file
tempfile tempsave
save "`tempsave'", replace


**********************************
******* FULL SAMPLE ESTIMATES ****
**********************************	

*Estimate firm contact gaps
gen delta_f=.
gen s_f=.
qui reg delta_fj f_*, r nocons
foreach f of numlist 1/`F' {
		replace delta_f=_b[f_`f'] if f==`f'
		replace s_f=_se[f_`f'] if f==`f'
}
gen z_f = delta_f/s_f

*Regress z and z-squared on s
preserve
bys firm_id: keep if _n==1
reg z_f s_f, r
predict r, res
gen r_sq=r^2
reg r_sq s_f, r
restore


**********************************
******* SPLIT SAMPLE ESTIMATES ****
**********************************	

****Estimates for a single split

	***Randomly select half of jobs for each firm
	gen u=uniform()
	sort firm_id u
	by firm_id: gen split=_n<=(_N/2)
	
	***Estimate contact gaps in each split
	foreach s of numlist 0/1 {
		gen z_`s'=.
		gen s_`s'=.
		qui reg delta_fj f_* if split==`s', r nocons
		foreach f of numlist 1/`F' {
			replace z_`s'=_b[f_`f']/_se[f_`f'] if f==`f'
			replace s_`s'=_se[f_`f'] if f==`f'
		}
	
	}
	drop if s_1==0 | s_0==0
	
	***Expand to stacked firm-level data set
	bys firm_id: keep if _n==1
	expand 2
	bys firm_id: gen sample=_n
	gen z=z_0
	replace z=z_1 if sample==2
	gen s=s_1
	replace s=s_0 if sample==2
	
	****Run regressions
	reg z s sample, r cluster(firm_id)
	predict r, res
	gen r_2=r^2
	reg r_2 s sample, r cluster(firm_id)


****Run repeated splits to assess sensitivity

	*Preliminaries
	local B=1000
	local b=1
	matrix T=J(`B',2,.)
	
	*Loop over trials
	while `b'<=`B' {
	
		qui {
	
			*Load data
			use "`tempsave'", clear
			
			*Split sample randomly
			gen u=uniform()
			sort firm_id u
			by firm_id: gen split=_n<=(_N/2)
			
			***Estimate contact gaps in each split
			foreach s of numlist 0/1 {
				gen z_`s'=.
				gen s_`s'=.
				qui reg delta_fj f_* if split==`s', r nocons
				foreach f of numlist 1/`F' {
					replace z_`s'=_b[f_`f']/_se[f_`f'] if f==`f'
					replace s_`s'=_se[f_`f'] if f==`f'
				}
			
			}
			drop if s_1==0 | s_0==0
			
			***Expand to stacked firm-level data set
			bys firm_id: keep if _n==1
			expand 2
			bys firm_id: gen sample=_n
			gen z=z_0
			replace z=z_1 if sample==2
			gen s=s_1
			replace s=s_0 if sample==2
			
			****Run regression
			reg z s sample, r cluster(firm_id)
			matrix T[`b',1]=_b[s]/_se[s]
			predict r, res
			gen r_2=r^2
			reg r_2 s sample, r cluster(firm_id)
			matrix T[`b',2]=_b[s]/_se[s]
	
	
		}
		
		disp `b'/`B'
		local ++b
	
	}

	clear
	svmat T
	sum


		
